clear all
global outty "C:\Users\...."  // change to your repository file address

/* install these packages (on top of standard packages, like reghdfe)*/
* ssc uninstall egenmore
* ssc install egenmore


/* IMPORT THECB DATA ----------------------------------------------------------------------------------------------------------------------
	The data was manually exported variable by variable from the THECB website.
	Some variables are available annually and others by semesters, merges must take these differences into account.
	Only public universities and community colleges are exported/included.
    Texas Higher Education Accountability System available at: http://www.txhigheredaccountability.org/AcctPublic/InteractiveReport/ManageReports 
    All relevant data was redownloaded on Oct 14 2021. Still, not all semester-variable combinations were available through Spring 2021 at the time. */
/* +++ */
* DEGREES ================================================================================================
* a_num_degrees_econdisadv and regions (not used) --------------
import delimited "$outty\raw econ disadv RFS.csv", clear stringcols(2,3,4) numericcols(5) 
* Below must be done because the names changed in the 2019-2021 data, must remove blanks so that names match accross years
replace instlist=strrtrim(instlist) // remove trailing blanks
save "$outty\temp1", replace  

*a_total_degrees and regions and counties (school in disaster county)  --------------
import delimited "$outty\raw tot degree RFS.csv", clear stringcols(2,3,4,6) numericcols(5,7)  
replace instlist=strrtrim(instlist) // remove trailing blanks
merge m:1 dimyear instlist  using "$outty\temp1"
drop _merge 
g a_econdisadv_degreeshare = a_num_degrees_econdisadv / a_total_degrees
save "$outty\temp3", replace 
* need regions and disaster county designation to use spring 2021 enrollment data below
keep instlist regiondesc school_county school_in_disaster_county
drop if regiondesc==""
* drop dups
sort instlist
quietly by instlist:  gen dup = cond(_N==1,0,_n)
drop if dup>1
drop dup
save "$outty\school_region_list", replace 

* ENROLLMENT ================================================================================================
* s_enrollment and semester---------------
import delimited "$outty\raw enrollment RFS.csv", clear  stringcols(2,3,4) numericcols(5)
replace instlist=strrtrim(instlist) 
	* for schools with two summer semesters, collapse their enrollment together into one summer semester (summer gets dropped later anyway, so this doesn't matter)
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	collapse (sum) s_enrollment, by(dimyear insttypelist instlist semesterdesc) 
merge m:1 dimyear instlist  using "$outty\temp3"
drop _merge
order  dimyear semesterdesc instlist insttypelist   
sort  instlist dimyear
drop regiondesc school_county school_in_disaster_county
merge m:1 instlist using "$outty\school_region_list"
drop _merge
save "$outty\temp4", replace  

* s_enrollment_txtuitex and semester (not used) ------------------
import delimited "$outty\s_enrollment for tuition exempt texas students.csv", clear  stringcols(2,3,4) numericcols(5)
replace instlist=strrtrim(instlist) 
rename s_enrollment_txstud_tuitexempt s_enrollment_txtuitex
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	collapse (sum) s_enrollment_txtuitex, by(dimyear insttypelist instlist semesterdesc) 
sort instlist dimyear semesterdesc  
merge 1:1 instlist dimyear semesterdesc   using "$outty\temp4"
drop _merge
g s_txtuitex_enrollment_share =  s_enrollment_txtuitex / s_enrollment
save "$outty\temp4a", replace  


/* IMPORT TREATMENT VARIABLE ------------------------- 
  s_enrollment_disastercounty  enrollment by geosource.  We use the 40 counties designated as sufficiently damaged as to be eligible for
  individual assistance programs (about 20 additional counties received public assistance)
  https://www.fema.gov/disaster/4332 Designated Counties (Individual Assistance):
  Aransas, Austin, Bastrop, Bee, Brazoria, Caldwell, Calhoun, Chambers, Colorado, DeWitt, Fayette, Fort Bend, Galveston, Goliad, Gonzales, 
  Grimes, Hardin, Harris, Jackson, Jasper, Jefferson, Karnes, Kleberg, Lavaca, Lee, Liberty, Matagorda, Montgomery, Newton, Nueces, Orange,
  Polk, Refugio, Sabine, San Jacinto, San Patricio, Tyler, Victoria, Walker, Waller, Wharton */
import delimited "$outty\raw enrollment by geosource RFS.csv", clear numericcols(7,8,9,10,11,12,13) stringcols(2,3,4,5,6,8) 
drop locationdesc residencedesc
replace instlist=strrtrim(instlist) 
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	collapse (sum) s_enrollment_houston	s_enrollment_dallas	s_enrollment_disastercounty, by(dimyear insttypelist instlist semesterdesc) 
sort  instlist dimyear semesterdesc
merge 1:1 dimyear instlist semesterdesc using "$outty\temp4a"
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment
sort  instlist dimyear semesterdesc
g s_dallas_enrollment_share = s_enrollment_dallas / s_enrollment
g s_houston_enrollment_share = s_enrollment_houston / s_enrollment
g s_disaster_enrollment_share = s_enrollment_disastercounty / s_enrollment
drop _merge 
save "$outty\temp5", replace  

* s_enrollment_byraceBlackHispanic  enrollment by race (not used)
import delimited "$outty\raw enrollment by race RFS.csv", clear  stringcols(2,3,4,5) numericcols(6)
replace instlist=strrtrim(instlist) 
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	replace ethnicitydesc="BlackHispanic" if ethnicitydesc=="Hispanic" | ethnicitydesc=="African American"
	collapse (sum) s_enrollment_byrace, by(dimyear insttypelist instlist semesterdesc ethnicitydesc) 
	sort  instlist dimyear semesterdesc ethnicitydesc
reshape wide s_enrollment_byrace, i(dimyear insttypelist instlist semesterdesc)  j(ethnicitydesc) string	
* replace missings with zeros since that means nobody met the condition
foreach x of varlist  s_enrollment_byraceAsian s_enrollment_byraceBlackHispanic s_enrollment_byraceInternational s_enrollment_byraceOther s_enrollment_byraceWhite {
  replace `x' = 0 if missing(`x') 
  }
merge 1:1 dimyear instlist semesterdesc using "$outty\temp5"
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment
sort  instlist dimyear semesterdesc
drop _merge
g s_blackhispanic_enrollment_share = s_enrollment_byraceBlackHispanic / s_enrollment
g s_enrollment_mrty = s_enrollment_byraceBlackHispanic 
g s_mrty_enrollment_share=s_blackhispanic_enrollment_share 
drop s_enrollment_byraceAsian s_enrollment_byraceInternational s_enrollment_byraceOther s_enrollment_byraceWhite
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment
sort  instlist dimyear semesterdesc
save "$outty\temp5a", replace  

* s_enrollment_byage  enrollment by age
import delimited "$outty\enrollment_by_age RFS.csv", clear  stringcols(2,3,4,5) numericcols(6)
replace instlist=strrtrim(instlist) 
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	collapse (sum) s_enrollment_byage, by(dimyear insttypelist instlist semesterdesc agegroupdesc) 
	sort  instlist dimyear semesterdesc agegroupdesc
reshape wide s_enrollment_byage, i(dimyear insttypelist instlist semesterdesc)  j(agegroupdesc) string	
drop s_enrollment_byageUnknown
foreach x of varlist  s_enrollment_byagelt18 s_enrollment_byage18_21 s_enrollment_byage22_24  s_enrollment_byage25_29 s_enrollment_byage30_34 s_enrollment_byage35_50 s_enrollment_byage51gteq {
  replace `x' = 0 if missing(`x') 
  }
merge 1:1 dimyear instlist semesterdesc using "$outty\temp5a"
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment s_enrollment_byagelt18
sort  instlist dimyear semesterdesc
drop _merge
save "$outty\temp5b", replace  

* s_enrollment_female enrollment by gender (not used)
import delimited "$outty\enrollment_by_gender RFS.csv" , clear  stringcols(2,3,4,5) numericcols(6)   
replace instlist=strrtrim(instlist) 
	keep if genderdesc == "Female"
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	collapse (sum) count , by(dimyear insttypelist instlist semesterdesc ) 
	sort  instlist  dimyear   semesterdesc 	
	g s_enrollment_female = count
	drop count
merge 1:1 dimyear instlist semesterdesc using "$outty\temp5b"
drop _merge
g s_female_enroll_share = s_enrollment_female / s_enrollment
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp5c", replace  


* a_enrollment_ugstatus    undergraduate (first-time, transfer, other) variable is available only for public universities and fall semester only (used in appendix)
import delimited "$outty\raw grad enrollment RFS.csv", clear  stringcols(2,3,4) numericcols(5)
replace instlist=strrtrim(instlist) 
rename a_enrollment_ugstatus a_enrollment_ug
g ugstatusdesc="tranfer" 
replace ugstatusdesc="enter" if classificationdesc=="First-time Undergraduate"
replace ugstatusdesc="previous" if classificationdesc=="Other Undergraduate"
drop classificationdesc
sort  instlist  dimyear   
reshape wide a_enrollment_ug, i(dimyear insttypelist instlist) j(ugstatusdesc) string
merge 1:m dimyear instlist  using "$outty\temp5c"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc
sort  instlist dimyear semesterdesc
merge 1:m dimyear instlist semesterdesc using "$outty\temp5c"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp5d", replace  


* a_ftdegreeseek_enrollment full time degree-seeking fall enrollment ONLY AVAIL IN FALL 2016-2020, not enough for a pre-trend (not used)
import delimited "$outty\raw ft degree seeking enrollment RFS.csv", clear  stringcols(2,3) numericcols(4)
replace instlist=strrtrim(instlist) // remove trailing blanks
merge 1:m dimyear instlist  using "$outty\temp5d"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp5e", replace  



* GRADUATION RATE & TIME TO DEGREE ================================================================================================
* graduation rate (univ 4 5 6) (College 3 4 6)   (2014-2020 available) (used)
import delimited "$outty\raw grad rate 3 4 6 RFS.csv", clear  stringcols(2,3) numericcols(4,5)
replace instlist=strrtrim(instlist) 
* calculate grad rate wi 5 years at uni and 4 years at com colleges
g a_yr54_grad_rate = grad_rate_bytime  if (timeframe == 5 & insttypelist=="Public Universities") | (timeframe == 4 & insttypelist!="Public Universities") 
collapse (mean)   a_yr54_grad_rate    , by(dimyear insttypelist instlist ) 
merge 1:m dimyear instlist  using "$outty\temp5e"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp6a", replace  

*years to degree (used)--------------------------
import delimited "$outty\raw time to degree RFS.csv", clear  stringcols(2,3) numericcols(4)
replace instlist=strrtrim(instlist) 
rename a_time_to_degree a_years_to_degree
merge 1:m dimyear instlist  using "$outty\temp6a"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp6b", replace  


* STUDENT DEBT AND PELL GRANTS AND TUITION ================================================================================================
* a_share_grads_w_debt  pct of graduates with debt (used) --------------
import delimited "$outty\raw pct grads w debt RFS.csv", clear  stringcols(2,3) numericcols(4)
replace instlist=strrtrim(instlist) 
merge 1:m dimyear instlist  using "$outty\temp6b"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp7a", replace  

* a_avgstudentdebt  student loan debt (used) --------------------------
import delimited "$outty\raw avg stud debt RFS.csv", clear  stringcols(2,3) numericcols(4)
replace instlist=strrtrim(instlist) 
merge 1:m dimyear instlist  using "$outty\temp7a"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp7b", replace  

/* students receiving pell grants (not used because the THECB failed to update this data beyond 2019 by Oct 2021 download) --------------------- */
import delimited "$outty\raw count pell grants RFS.csv", clear  stringcols(2,3,4) numericcols(5)
replace instlist=strrtrim(instlist) 
replace pelldesc="NoPell" if pelldesc == "No Pell" 
reshape wide a_enrollment_, i(dimyear instlist insttypelist)  j(pelldesc) string
egen tot = rowtotal(a_enrollment_NoPell a_enrollment_Pell)
g a_pell_enrollment_share = a_enrollment_Pell / tot
drop a_enrollment_NoPell  tot
merge 1:m dimyear instlist  using "$outty\temp7b"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
save "$outty\temp7c", replace  

*tuition and fees (used) ---------------------
import delimited "$outty\raw tuition RFS.csv", clear  stringcols(2,3) numericcols(4)
replace instlist=strrtrim(instlist) 
merge 1:m dimyear instlist  using "$outty\temp7c"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
replace semesterdesc="Fall" if missing(semesterdesc)
save "$outty\temp7d", replace  


/* +++ 
ENROLLMENT AND EARNINGS BY MAJOR ================================================================================================
  Research assistant, Ananya Radhakrishnan, downloaded major earnings data from Census Bureau's Post-Secondary Employment Outcomes (PSEO) data in June 2020 and merged the earnings data
  on CIP with the THECB enrollment by major data. To maximize the match rate between the THECB and Census data (which has missing information for some cohort years and CIP combinations),
  she averaged the different percentile earnings for each major across the most recent three available cohort years (2010, 2011, and 2013). When there is no CIP earnings, the earnings is 
  set to the median of the corresponding CIP group. And, when there is no CIP group salary for the major, she sometimes classified missings as zeros based on internet searches about the 
  job. For example "32010100 - Basic Skills and Developmental/Remedial Education, General" is classified as earnings zero because this ensures it is always classified as a below-median 
  "low" earnings major. Same for military majors since incomes according to Defense Finance and Accounting Service would also be low. We tested other treatments of missings -- i.e. 
  setting them to missing and dropping them from analyses and results were similar because these tend to be low-earnings majors. */ 
  import delimited "$outty\enrollment by major and earnings.csv" , clear  stringcols(2,3,4,5,6,7) numericcols(8,9,10,11,12,13,14,15,16,17,18)  
* drop if y10_p50 == 0 // note that the results are very similar when zeros are dropped
* "$outty\enrollment by major and earnings extra screen.csv" // alternatively, this version has no zeros and additional effort to fill in missings with available information, but results nearly identical.
replace instlist=strrtrim(instlist) 
drop if semesterdesc=="Summer" | semesterdesc=="Summer I" | semesterdesc=="Summer II" // summer semester is not included in analyses
tab semesterdesc
* convert semesters to quarters for easier event study graphs later
g yq=yq(dimyear,4)
format yq %tq	
replace yq=yq(dimyear,1) if semesterdesc=="Spring"
keep if enrollment_by_cip!=0 // no obs deleted bc not a balanced panel, if there is zero enrollment in a school-major-date, it is not present
* CSV file has undeclared set to zero earnings, but undeclared wi LSA set to avg LSA earnings. Change undeclared wi LSA set to zero:
foreach x of varlist   y1_p25	y1_p50	y1_p75	y5_p25	y5_p50	y5_p75	y10_p25	y10_p50	y10_p75  {
  replace `x' = 0 if undeclared==1  // this puts undecared at the bottom of the CIP earnings list
}
replace cipgroupdesc_short = "Undeclared" if undeclared==1  // to relable undeclared wi LSA as undeclared
* create quartiles of major earnings by school
egen quartile_y1_p25=xtile(y1_p25), n(4)   by(instlist)  //instlist insttypelist
egen quartile_y1_p50=xtile(y1_p50), n(4) by(instlist) 
egen quartile_y1_p75=xtile(y1_p75), n(4) by(instlist) 
egen quartile_y5_p25=xtile(y5_p25), n(4) by(instlist) 
egen quartile_y5_p50=xtile(y5_p50), n(4) by(instlist) 
egen quartile_y5_p75=xtile(y5_p75), n(4) by(instlist) 
egen quartile_y10_p25=xtile(y10_p25), n(4) by(instlist) 
egen quartile_y10_p50=xtile(y10_p50), n(4) by(instlist) 
egen quartile_y10_p75=xtile(y10_p75), n(4) by(instlist) 
* set undeclared to missing a quartile earnings group so that it is not included in the summation below
foreach Tk of varlist 	quartile_y1_p25 quartile_y1_p50 quartile_y1_p75 ///
						    quartile_y5_p25 quartile_y5_p50 quartile_y5_p75 ///
						    quartile_y10_p25 quartile_y10_p50 quartile_y10_p75  ///
					{
	replace `Tk' = . if undeclared==1
}
* define low and high as a median cut (for now)
g lo_y1_p25 = (quartile_y1_p25==1 | quartile_y1_p25==2)   
g hi_y1_p25 = (quartile_y1_p25==3 | quartile_y1_p25==4)   
g lo_y1_p50 = (quartile_y1_p50==1 | quartile_y1_p50==2)   
g hi_y1_p50 = (quartile_y1_p50==3 | quartile_y1_p50==4)   
g lo_y1_p75 = (quartile_y1_p75==1 | quartile_y1_p75==2)   
g hi_y1_p75 = (quartile_y1_p75==3 | quartile_y1_p75==4)   
g lo_y5_p25 = (quartile_y5_p25==1 | quartile_y5_p25==2)   
g hi_y5_p25 = (quartile_y5_p25==3 | quartile_y5_p25==4)   
g lo_y5_p50 = (quartile_y5_p50==1 | quartile_y5_p50==2)   
g hi_y5_p50 = (quartile_y5_p50==3 | quartile_y5_p50==4)   
g lo_y5_p75 = (quartile_y5_p75==1 | quartile_y5_p75==2)   
g hi_y5_p75 = (quartile_y5_p75==3 | quartile_y5_p75==4)   
g lo_y10_p25 = (quartile_y10_p25==1 | quartile_y10_p25==2)   
g hi_y10_p25 = (quartile_y10_p25==3 | quartile_y10_p25==4)   
g lo_y10_p50 = (quartile_y10_p50==1 | quartile_y10_p50==2)   
g hi_y10_p50 = (quartile_y10_p50==3 | quartile_y10_p50==4)   
g lo_y10_p75 = (quartile_y10_p75==1 | quartile_y10_p75==2)  
g hi_y10_p75 = (quartile_y10_p75==3 | quartile_y10_p75==4)  
drop quartile_y1_p25 quartile_y1_p50 quartile_y1_p75 quartile_y5_p25 quartile_y5_p50 quartile_y5_p75 quartile_y10_p25 quartile_y10_p50 quartile_y10_p75
* assign enrollment to a major group before summing to the school level 
g s_enrollment_lo_y1_p25 = enrollment_by_cip * lo_y1_p25
g s_enrollment_hi_y1_p25 = enrollment_by_cip * hi_y1_p25
g s_enrollment_lo_y1_p50 = enrollment_by_cip * lo_y1_p50
g s_enrollment_hi_y1_p50 = enrollment_by_cip * hi_y1_p50
g s_enrollment_lo_y1_p75 = enrollment_by_cip * lo_y1_p75
g s_enrollment_hi_y1_p75 = enrollment_by_cip * hi_y1_p75
g s_enrollment_lo_y5_p25 = enrollment_by_cip * lo_y5_p25
g s_enrollment_hi_y5_p25 = enrollment_by_cip * hi_y5_p25
g s_enrollment_lo_y5_p50 = enrollment_by_cip * lo_y5_p50
g s_enrollment_hi_y5_p50 = enrollment_by_cip * hi_y5_p50
g s_enrollment_lo_y5_p75 = enrollment_by_cip * lo_y5_p75
g s_enrollment_hi_y5_p75 = enrollment_by_cip * hi_y5_p75
g s_enrollment_lo_y10_p25 = enrollment_by_cip * lo_y10_p25
g s_enrollment_hi_y10_p25 = enrollment_by_cip * hi_y10_p25
g s_enrollment_lo_y10_p50 = enrollment_by_cip * lo_y10_p50
g s_enrollment_hi_y10_p50 = enrollment_by_cip * hi_y10_p50
g s_enrollment_lo_y10_p75 = enrollment_by_cip * lo_y10_p75
g s_enrollment_hi_y10_p75 = enrollment_by_cip * hi_y10_p75
* undeclared enrollment #
g s_enrollment_undeclared  = enrollment_by_cip * undeclared
g log_enrollment_by_cip = ln(enrollment_by_cip) 
sort  dimyear instlist semesterdesc
by dimyear instlist semesterdesc : egen sum_enrollment = total(enrollment_by_cip)
g cip_enrollment_share = enrollment_by_cip / sum_enrollment
save "$outty\school_major_year RFS", replace  

use "$outty\school_major_year RFS", clear 
* Sum to enrollment in low vs. high earning majors to the school-semester level
	replace semesterdesc="Summer" if semesterdesc=="Summer I" | semesterdesc=="Summer II"
	collapse (sum)  enrollment_by_cip ///
					s_enrollment_lo_y1_p25 s_enrollment_hi_y1_p25 s_enrollment_lo_y1_p50 s_enrollment_hi_y1_p50 s_enrollment_lo_y1_p75 s_enrollment_hi_y1_p75 ///
					s_enrollment_lo_y5_p25 s_enrollment_hi_y5_p25 s_enrollment_lo_y5_p50 s_enrollment_hi_y5_p50 s_enrollment_lo_y5_p75 s_enrollment_hi_y5_p75 ///
					s_enrollment_lo_y10_p25 s_enrollment_hi_y10_p25 s_enrollment_lo_y10_p50 s_enrollment_hi_y10_p50 s_enrollment_lo_y10_p75 s_enrollment_hi_y10_p75 ///
					s_enrollment_undeclared , by(dimyear insttypelist instlist semesterdesc) 
	sort  instlist  dimyear   semesterdesc
* log enrollment variables
foreach Tk of varlist  s_enrollment_lo_y1_p25 s_enrollment_hi_y1_p25 s_enrollment_lo_y1_p50 s_enrollment_hi_y1_p50 s_enrollment_lo_y1_p75 s_enrollment_hi_y1_p75 ///
					s_enrollment_lo_y5_p25 s_enrollment_hi_y5_p25 s_enrollment_lo_y5_p50 s_enrollment_hi_y5_p50 s_enrollment_lo_y5_p75 s_enrollment_hi_y5_p75 ///
					s_enrollment_lo_y10_p25 s_enrollment_hi_y10_p25 s_enrollment_lo_y10_p50 s_enrollment_hi_y10_p50 s_enrollment_lo_y10_p75 s_enrollment_hi_y10_p75 ///
					{
	g log_`Tk' = ln(`Tk' )  
	replace log_`Tk'=. if missing(`Tk')
}
	* merge in full data at semester level
merge 1:m dimyear instlist semesterdesc using "$outty\temp7d"
drop _merge 
order  dimyear semesterdesc instlist insttypelist   regiondesc s_enrollment 
sort  instlist dimyear semesterdesc
* define schools with a lot of low earnings (calle "high risk" at the time) enrollment
g s_high_risk_enroll_share = s_enrollment_lo_y10_p50 / (enrollment_by_cip)  // 10-year earnings amount is below median -- "share of enrollment in low earnings majors" 
replace s_high_risk_enroll_share=. if missing(s_high_risk_enroll_share)
g s_enroll_undeclared_share = s_enrollment_undeclared / (enrollment_by_cip) 
drop enrollment_by_cip
save "$outty\school_data_bf_clean", replace  


/* +++ */
*CLEAN DATA -----------------------------------
use "$outty\school_data_bf_clean", clear  
drop if dimyear<2014
drop if semesterdesc=="Summer" | semesterdesc=="Summer I" | semesterdesc=="Summer II" // Summer semester is never used in analysis 

* generate dates out of 2 semesters
* quarters
tab semesterdesc
g yq=yq(dimyear,4)
format yq %tq	
replace yq=yq(dimyear,1) if semesterdesc=="Spring"
* half years
g yh=yh(dimyear,2)
format yh %th
replace yh=yh(dimyear,1) if semesterdesc=="Spring"

sort  instlist dimyear yq yh semesterdesc     
order  instlist dimyear yq yh semesterdesc  insttypelist   regiondesc

g insttypelist_adj = "College"
replace insttypelist_adj = "University" if insttypelist=="Public Universities"

tab insttypelist  if yq == yq(2017, 1)
replace insttypelist=insttypelist_adj
drop insttypelist_adj

save "$outty\temp13", replace  


/* +++ */
/*----------spring 2017 fixed measures-------------------*/
use "$outty\temp13", clear  
keep if yq == yq(2017, 1)  // TREATMENT IS DEFINED ACCORDING TO SPRING SEMESTER 2017
global highcutoff = 0.50  //  used to split ranked variables at a percentile
				
keep instlist    a_tuitionfees_p30hours   s_dallas_enrollment_share s_disaster_enrollment_share instlist insttypelist  
				   
rename a_tuitionfees_p30hours a_tuitionfees_p30hours_2017q1
* pct rank by type
by insttypelist, sort: egen n = count(a_tuitionfees_p30hours_2017q1)
by insttypelist: egen i = rank(a_tuitionfees_p30hours_2017q1), track 
gen pranktype_tuition_2017q1 = (i) / (n) 
drop n i 
* median split by type
g hightype_tuition_2017q1 = (pranktype_tuition_2017q1>$highcutoff)

* DALLAS (PLACEBO) TREATMENT VARIABLES  
rename s_dallas_enrollment_share  s_dallas_enrollshare_2017q1
* median split by type
bysort insttypelist : egen typemedian_dallas_enrollshare = xtile(s_dallas_enrollshare_2017q1), nq(2) 
	g typetop50_dal_enrollshare_2017q1 = (typemedian_dallas_enrollshare==2)  
	g typebot50_dal_enrollshare_2017q1 = (typemedian_dallas_enrollshare==1)	
* pct rank by type
by insttypelist, sort: egen n = count(s_dallas_enrollshare_2017q1)
by insttypelist: egen i = rank(s_dallas_enrollshare_2017q1), track 
gen pcranktype_dal_enroll_2017q1 = (i) / (n) 
drop n i 	
	
* MAIN TREATMENT VARIABLE
rename s_disaster_enrollment_share s_disaster_enrollshare_2017q1
* median split by type
bysort insttypelist : egen typemedian_disaster_enrollshare = xtile(s_disaster_enrollshare_2017q1), nq(2) 
	g typetop50_dis_enrollshare_2017q1 = (typemedian_disaster_enrollshare==2) 
	g typebot50_dis_enrollshare_2017q1 = (typemedian_disaster_enrollshare==1)    
* pct rank by type
by insttypelist, sort: egen n = count(s_disaster_enrollshare_2017q1)
by insttypelist: egen i = rank(s_disaster_enrollshare_2017q1), track 
gen pcranktype_dis_enroll_2017q1 = (i) / (n) 
drop n i 
/*--------------------------------*/

* merge the spring 2017 cross sectional data into main dataset
merge 1:m instlist  using "$outty\temp13"
* Note that 10 universities either closed or stopped reporting after spring 2020 (due to pandemic). They are set to missing in our regressions after spring 2020.
drop _merge 

sort  instlist dimyear yq yh semesterdesc     
order  instlist dimyear yq yh semesterdesc  insttypelist   regiondesc


* create quarterly dummies
	gen d2014q1 = (yq == yq(2014, 1))
	gen d2014q2 = (yq == yq(2014, 2))
	gen d2014q3 = (yq == yq(2014, 3))
	gen d2014q4 = (yq == yq(2014, 4))
	gen d2015q1 = (yq == yq(2015, 1))
	gen d2015q2 = (yq == yq(2015, 2))
	gen d2015q3 = (yq == yq(2015, 3))
	gen d2015q4 = (yq == yq(2015, 4))
	gen d2016q1 = (yq == yq(2016, 1))
	gen d2016q2 = (yq == yq(2016, 2))
	gen d2016q3 = (yq == yq(2016, 3))
	gen d2016q4 = (yq == yq(2016, 4))
	gen d2017q1 = (yq == yq(2017, 1))
	gen d2017q2 = (yq == yq(2017, 2))
	gen d2017q3 = (yq == yq(2017, 3))
	gen d2017q4 = (yq == yq(2017, 4))
	gen d2018q1 = (yq == yq(2018, 1))
	gen d2018q2 = (yq == yq(2018, 2))
	gen d2018q3 = (yq == yq(2018, 3))
	gen d2018q4 = (yq == yq(2018, 4))
	gen d2019q1 = (yq == yq(2019, 1))
	gen d2019q2 = (yq == yq(2019, 2))
	gen d2019q3 = (yq == yq(2019, 3))
	gen d2019q4 = (yq == yq(2019, 4))	
	gen d2020q1 = (yq == yq(2020, 1))
	gen d2020q2 = (yq == yq(2020, 2))
	gen d2020q3 = (yq == yq(2020, 3))
	gen d2020q4 = (yq == yq(2020, 4))
	gen d2021q1 = (yq == yq(2021, 1))
	gen d2021q2 = (yq == yq(2021, 2))
	gen d2021q3 = (yq == yq(2021, 3))
	gen d2021q4 = (yq == yq(2021, 4))	
* create annual dummies
	gen y2014 = (dimyear == 2014)
	gen y2015 = (dimyear == 2015)
	gen y2016 = (dimyear == 2016)
	gen y2017 = (dimyear == 2017)
	gen y2018 = (dimyear == 2018)
	gen y2019 = (dimyear == 2019)
	gen y2020 = (dimyear == 2020)
	gen y2021 = (dimyear == 2020)

egen long semesterdesc_ = group(semesterdesc)
egen long instlist_ = group(instlist)
egen long regiondesc_ = group(regiondesc)
egen long insttypelist_ = group(insttypelist)
tostring dimyear, generate(dimyear2)
egen region_year= concat(regiondesc_ dimyear2)
egen long region_year_ = group(region_year)
tostring yq, generate(yq2)
egen region_yq= concat(regiondesc_ yq2)
egen long region_yq_ = group(region_yq)
save "$outty\temp14", replace  

/* +++ +++ */
use "$outty\temp14", clear  	
/* Disaster counties enrollment share (MUST CHANGE DEPENDING ON HOW YOU WANT TO SPLIT TREATMENT (MEDIAN VS. PERCENTILE RANK; DISASTER VS DALLAS) 
   MAIN ANALYSIS USES (DISASTER COUNTY MEDIAN SPLIT): typetop50_dis_enrollshare_2017q1 */
global enrollmentsharemeasure typetop50_dis_enrollshare_2017q1  // pcranktype_dis_enroll_2017q1 typetop50_dis_enrollshare_2017q1     APPENDIX PLACEBO DALLAS: pcranktype_dal_enroll_2017q1 typetop50_dal_enrollshare_2017q1
forvalues z = 14/21 {
	gen d20`z'q1_x_treat2 = $enrollmentsharemeasure * d20`z'q1
	gen d20`z'q2_x_treat2= $enrollmentsharemeasure * d20`z'q2
/*	gen d20`z'q3_x_treat2 = $enrollmentsharemeasure * d20`z'q3  SEMESTERS HAVE BEEN CONVERTED TO QUARTERS TO GRAPH MORE EASILY, Q1 = SPRING, Q2 = SUMMER (DROPPED FROM ANALYSES), Q3=Nothing; Q4 = FALL */
	gen d20`z'q4_x_treat2= $enrollmentsharemeasure * d20`z'q4
	
}
forvalues z = 14/21 {
	gen d20`z'_x_treat2 = $enrollmentsharemeasure * y20`z'
}
* semester pre vs. post treatment
g POSTs =  (yq>yq(2017,2)) 
g treat2_x_POSTs = $enrollmentsharemeasure * POSTs
* univ interactions
g univ = (insttypelist=="University"	)
g POSTs_x_univ = POSTs * univ
g treat2_x_univ = $enrollmentsharemeasure * univ
g treat2_x_POSTs_x_univ = $enrollmentsharemeasure * POSTs * univ
* College interactions
g comm = (insttypelist=="College"	)
g POSTs_x_comm = POSTs * comm
g treat2_x_comm = $enrollmentsharemeasure * comm
g treat2_x_POSTs_x_comm = $enrollmentsharemeasure * POSTs * comm
* can't interact POSTa_x_comm or POSTa_x_univ because collinear with the type x date FE


g treat2_x_POSTs_x_tuitT = $enrollmentsharemeasure * POSTs * pranktype_tuition_2017q1
g treat2_x_POSTs_x_tuitTH = $enrollmentsharemeasure * POSTs * hightype_tuition_2017q1
g POSTs_x_tuitT = POSTs * pranktype_tuition_2017q1
g POSTs_x_tuitTH = POSTs * hightype_tuition_2017q1

* annual pre vs. post treatment
g POSTa =  (dimyear>2017) 
g treat2_x_POSTa = $enrollmentsharemeasure * POSTa
g treat2_x_POSTa_x_univ = $enrollmentsharemeasure * POSTa * univ
g treat2_x_POSTa_x_comm = $enrollmentsharemeasure * POSTa * comm
* can't interact POSTa_x_comm or POSTa_x_univ because collinear with the type x date FE

g treat2_x_POSTa_x_tuitT = $enrollmentsharemeasure * POSTa * pranktype_tuition_2017q1
g treat2_x_POSTa_x_tuitTH = $enrollmentsharemeasure * POSTa * hightype_tuition_2017q1
g POSTa_x_tuitTH = POSTa * hightype_tuition_2017q1

* transform possible outcome variables
g log_s_enrollment = ln(s_enrollment)
g log_s_enrollment_female = ln(s_enrollment_female)

* outcomes for which a school could have a legitimate zero that is not a missing
foreach Tk of varlist a_avgstudentdebt a_ftdegreeseek_enrollment s_enrollment_txtuitex a_enrollment_ugenter  a_enrollment_ugprevious  a_enrollment_ugtranfer  ///
					  s_enrollment_mrty	a_num_degrees_econdisadv a_enrollment_Pell	s_enrollment_byagelt18 s_enrollment_byage18_21 s_enrollment_byage22_24 s_enrollment_byage25_29	///
					  s_enrollment_byage30_34 s_enrollment_byage35_50 s_enrollment_byage51gteq		{
	g log_`Tk' = ln(`Tk' +1)  
	replace log_`Tk'=. if missing(`Tk')
}

keep if yq<yq(2021,2) // spring 2021 is the last semester of most available variables as of Oct 2021

sort instlist yq 
by instlist: gen nqtr = [_N] // count the number of quarters that each cid has observations for
egen max_nqtr = max(nqtr) // calculate the maximum number of quarters per instlist in our dataset: 14 
keep if nqtr>=12 // this drops 4 schools that did not report consistent data or closed (opened) before (after) Harvey.

order instlist dimyear yq yh semesterdesc instlist insttypelist   regiondesc  

save "$outty\s_texasdata", replace  



				
				
/* +++ */		
/*11111111111111111111111111111111111 DESCRIPTIVE STATS 1111111111111111111111111111111111*/

* APPENDIX Table A1: Descriptive statistics of higher education outcomes, school-level, Spring 2017
* ALL
clear
use "$outty\s_texasdata" , clear
sort    instlist dimyear yq
keep if dimyear==2017
keep if semesterdesc=="Spring" 
drop if s_enrollment==0 | a_years_to_degree==0 | a_yr54_grad_rate==0 
eststo clear  
sort  insttypelist
by  insttypelist:eststo: estpost su   s_enrollment s_disaster_enrollment_share  s_high_risk_enroll_share   s_mrty_enrollment_share  a_years_to_degree a_yr54_grad_rate  ///
						 a_share_grads_w_debt a_avgstudentdebt a_tuitionfees_p30hours a_pell_enrollment_share a_econdisadv_degreeshare   ///
					, detail
esttab using "$outty/TableA1_Part1.csv", replace cells("mean(pattern(1 1) fmt(2)) sd(pattern(1 1))  p10(pattern(1 1 1)) p50(pattern(1 1 1)) p90(pattern(1 1 1))") label    title("Descriptive statistics for  Texas public universities and colleges, pre- vs. post-Hurricane")

* BY TREATMENT
clear
use "$outty\s_texasdata" , clear
sort    instlist dimyear yq
keep if dimyear==2017
keep if semesterdesc=="Spring" 
drop if s_enrollment==0 | a_years_to_degree==0 | a_yr54_grad_rate==0 
eststo clear  
sort  insttypelist typetop50_dis_enrollshare_2017q1
by  insttypelist typetop50_dis_enrollshare_2017q1:eststo: estpost su   s_enrollment s_disaster_enrollment_share  s_high_risk_enroll_share   s_mrty_enrollment_share  a_years_to_degree a_yr54_grad_rate  ///
						 a_share_grads_w_debt a_avgstudentdebt a_tuitionfees_p30hours a_pell_enrollment_share a_econdisadv_degreeshare   ///
					, detail
esttab using "$outty/TableA1_Part2.csv", replace cells("mean(pattern(1 1) fmt(2)) sd(pattern(1 1))  p10(pattern(1 1 1)) p50(pattern(1 1 1)) p90(pattern(1 1 1))") label    title("Descriptive statistics for  Texas public universities and colleges, pre- vs. post-Hurricane")



/* +++ +++ */
*++++++++++++++++++++++++++++++++++ DISASTER ENROLLMENT EXPOSURE EVENT STUDY GRAPHS ++++++++++++++++++++++++++++++++++++++++

/* Figure 3: Event study effects of above-median enrollment exposure on school-level outcomes
   Note: To reproduce Appendix Figure A11: Event study effects of ranked enrollment exposure on school-level outcomes, 
   You must first rerun data above after changing the treatment variable from the median split to the percentile rank measure of disaster exposure, 
   also set triangle markers to cirles manually below. */
   
* semester level enrollment variables, panels a and b
clear
use "$outty\s_texasdata" , clear
global error_clusts  vce(robust) 
global error_clust1 vce(cluster region_yq_ ) 
foreach depvar of varlist    log_s_enrollment    log_s_enrollment_byage18_21        {
preserve
statsby _b _se , clear: quietly reghdfe  `depvar'   ///	
		d2014q1_x_treat2 d2014q4_x_treat2	d2015q1_x_treat2 d2015q4_x_treat2	d2016q1_x_treat2 d2016q4_x_treat2 ///
		/*	d2017q1_x_treat2  control */ 	d2017q4_x_treat2  	 ///			 
 		d2018q1_x_treat2 d2018q4_x_treat2	d2019q1_x_treat2 d2019q4_x_treat2	d2020q1_x_treat2 d2020q4_x_treat2	 d2021q1_x_treat2 					///							
		 ,  ///
		absorb(  i.instlist_  i.yq    i.insttypelist_#i.yq  i.region_year_  )  $error_clust1 
 g _se_d2017q1_x_treat2 = 0
 g _b_d2017q1_x_treat2 = 0
keep *x_treat* 
xpose, clear  varname  
g beta_se = substr(_varname,1,3) 
replace beta_se = subinstr(beta_se, "_", "",.) 
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
replace _varname = subinstr(_varname, "_x_treat2", "",.) 
gen date = quarterly(_varname, "YQ") 
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (1960/1000)*se_  // 1960 95% CI  
gen ci_high_ = b_ + (1960/1000)*se_
drop   se_
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(T) /*O T CHANGE FOR Figure A11*/  lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray)  ) ,
		legend(order(1 2 )) legend(order(1 "Exposure (enrollment from disaster counties)" 2 "95% CI"  ) ring(0)  position(7) ) 
		title(`depvar' (All))
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(230, lcolor(black) lpattern(dash)) 
		graphregion(margin(small) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
 	graph save "$outty/Event_median_`depvar'.gph", replace	// rank_ vs. median_  CHANGE FOR Figure A11
restore	
}  // close dependent



* annual measures, panels c, d, e, f
clear
use "$outty\s_texasdata" , clear
global error_clust2 vce(cluster region_year_) 
sort    instlist dimyear yq
* keeps the spring semester (which is the time period of reference in annual data)
 quietly by   instlist dimyear: gen dup = cond(_N==1,0,_n)
 drop if dup>1
foreach depvar of varlist     a_yr54_grad_rate  a_years_to_degree log_a_avgstudentdebt	a_share_grads_w_debt   { 
preserve
statsby _b _se , clear: quietly reghdfe  `depvar'   ///	
		 d2014_x_treat2   d2015_x_treat2 d2016_x_treat2	/*	d2017_x_treat2  control */ 	d2018_x_treat2	d2019_x_treat2	d2020_x_treat2     ///			 			 
		     ,  ///
		absorb( i.instlist_  i.dimyear     i.insttypelist_#i.dimyear  i.region_year_  )  $error_clust2
keep *x_treat*  
 g _se_d2017_x_treat2 = .
 g _b_d2017_x_treat2 = 0
xpose, clear  varname  
g beta_se = substr(_varname,1,3) 
replace beta_se = subinstr(beta_se, "_", "",.) 
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
replace _varname = subinstr(_varname, "_x_treat2", "",.) 
sort  beta_se _varname
gen date1=date(_varname,"Y")
gen date = year(date1) 
drop date1 _varname
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (1960/1000)*se_  // 1960 95% CI
gen ci_high_ = b_ + (1960/1000)*se_
drop   se_ 
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(T) /* O T CHANGE FOR Figure A11 */  lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray)  ) ,
		legend(order(1 2 )) legend(order(1 "Exposure (enrollment from disaster counties)" 2 "95% CI"  ) ring(0)  position(7)  ) 
		title(`depvar' (All))
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(2017.33, lcolor(black)) 
		graphregion(margin(small) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_median_`depvar'.gph", replace	// rank_ vs. median_  CHANGE FOR Figure A11
restore	
}  // close dependent	


		
/* Final Figures ------------------------------------------------------ */          
* BUILD Figure 3
#d
			graph combine  
			"$outty/Event_median_log_s_enrollment"  "$outty/Event_median_log_s_enrollment_byage18_21" 	
				"$outty/Event_median_a_yr54_grad_rate"   "$outty/Event_median_a_years_to_degree" 
				 "$outty/Event_median_log_a_avgstudentdebt" "$outty/Event_median_a_share_grads_w_debt" 
			,   rows(3) cols(2) fxsize(100) fysize(200) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) 
			 play("$outty/Enroll_Event_AboveMedian_RFS1.grec")    imargin(4 4 3 3)  ;
#d cr

* BUILD APPENDIX Figure A11
/* To reproduce Appendix Figure A11, you must first rerun data and after changing the treatment variable from the median split to the percentile rank measure of disaster exposure:
	pcranktype_dis_enroll_2017q1  */
#d
			graph combine  
			"$outty/Event_rank_log_s_enrollment"  "$outty/Event_rank_log_s_enrollment_byage18_21" 	
				"$outty/Event_rank_a_yr54_grad_rate"   "$outty/Event_rank_a_years_to_degree" 
				 "$outty/Event_rank_log_a_avgstudentdebt" "$outty/Event_rank_a_share_grads_w_debt" 
			,   rows(3) cols(2) fxsize(100) fysize(200) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) 
			 play("$outty/Enroll_Event_rank_RFS1.grec")  imargin(4 4 3 3)  ;
#d cr


/* APPENDIX PLACEBO TEST: 
Figure A13: Placebo test of the effect of enrollment exposure to Dallas counties on school-level outcomes
  To reproduce, must rerun data and change treatment measure to : typetop50_dal_enrollshare_2017q1 (panel A) and pcranktype_dal_enroll_2017q1 (panel B) */
#d
			graph combine  
						// median
						"$outty/DalEvent_median_log_s_enrollment" "$outty/DalEvent_median_a_yr54_grad_rate" "$outty/DalEvent_median_a_years_to_degree" 
						// rank
						"$outty/DalEvent_rank_log_s_enrollment" "$outty/DalEvent_rank_a_yr54_grad_rate" "$outty/DalEvent_rank_a_years_to_degree" 
			,   rows(2) cols(3) fxsize(130) fysize(50) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(large) color(white)) 
			  play("$outty/Enroll_Event_BothPanels_RFS1.grec")  imargin(4 4 3 3)  ;
#d cr
/* -------------------------------------------------------------------- */ 


			
*8888888888888888888888888888888888 REGRESSION TABLES 888888888888888888888888888888888888888888888888888888888
* Table 4: Effect of above-median enrollment exposure on school-level outcomes (columns 5-10)
clear
use "$outty\s_texasdata" , clear
sort    instlist dimyear yq
quietly by   instlist dimyear: gen dup = cond(_N==1,0,_n)
drop if dup>1
sort instlist dimyear yq
order instlist dimyear yq
global error_clusta vce(cluster region_year_)  
eststo clear 
foreach m of varlist  a_yr54_grad_rate a_years_to_degree     {
*--------------------- disaster exposure ------------------
* all
eststo: quietly	reghdfe  `m'    treat2_x_POSTa    		  			 										, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_year_  )  $error_clusta 
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
* robust errors
eststo: quietly	reghdfe  `m'    treat2_x_POSTa    		  			 										, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_year_  )  vce(robust)
estadd local Samp  	 "Robust"
estadd ysumm
estadd scalar r = e(r2_a)
*ex gulf coast
eststo: quietly	reghdfe  `m'    treat2_x_POSTa    		  			 			if school_in_disaster_county!=1						, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_year_  )  $error_clusta 
estadd local Samp  	 "exDisCounty"
estadd ysumm
estadd scalar r = e(r2_a)
}
		esttab using "$outty/Table4_C5_C10.csv", replace cells(b(star fmt(3)) t(par fmt(2)))   ///
		stats(N r ymean  Samp , label(N "AdjR2" "Y-mean" "Sample") fmt(0 2 2 0))    ///
		order( treat2_x_POSTa ) drop(_cons)  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
		

* Table 4: Effect of above-median enrollment exposure on school-level outcomes (columns 1-4)
clear
use "$outty\s_texasdata" , clear
eststo clear 
global error_clusrs vce(cluster region_yq_ ) 
keep if !missing(s_enrollment)
*-------------------------------------------------------
foreach m of varlist    log_s_enrollment_byage18_21  { 
*--------------------- disaster exposure ------------------
* all
eststo: quietly	reghdfe  log_s_enrollment     treat2_x_POSTs    					, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_  )  $error_clusrs 
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)

eststo: quietly	reghdfe  `m'     treat2_x_POSTs    				, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_  )  $error_clusrs 
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
* robust errors
eststo: quietly	reghdfe  `m'    treat2_x_POSTs    		  			 							, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_  )  vce(robust)
estadd local Samp  	 "Robust"
estadd ysumm
estadd scalar r = e(r2_a)
*ex. Disasater county
eststo: quietly	reghdfe  `m'    treat2_x_POSTs    		  if school_in_disaster_county!=1				, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_  )  $error_clusrs 
estadd local Samp  	 "ExDis"
estadd ysumm
estadd scalar r = e(r2_a)

}
		esttab using "$outty/Table4_C1_C4.csv", replace cells(b(star fmt(3)) t(par fmt(2)))   ///
		stats(N r ymean  Samp, label(N "AdjR2" "Y-mean" "Sample") fmt(0 2 2 0))    ///
		order( treat2_x_POSTs   ) drop( _cons* )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
		


		
* Table 5: Effect of above-median enrollment exposure on school-level enrollment, by school-type and tuition rank
clear
use "$outty\s_texasdata" , clear
eststo clear 
global error_clusrs vce(cluster region_yq_ ) // vce(robust)
/* Tuition split options:  tuitT = pranktype_tuition_2017q1  (percentile rank within school type) ; tuitTH = hightype_tuition_2017q1 (above median, split within school type) */
global T  tuitT 
keep if !missing(s_enrollment)
*-------------------------------------------------------
foreach m of varlist   log_s_enrollment   { 
*--------------------- disaster exposure ------------------
* college interaction
eststo: quietly	reghdfe  `m'    treat2_x_POSTs treat2_x_POSTs_x_comm							, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_ )  $error_clusrs
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
* college interaction, log_s_enrollment_byage18_21
eststo: quietly	reghdfe  log_s_enrollment_byage18_21    treat2_x_POSTs treat2_x_POSTs_x_comm							, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_ )  $error_clusrs
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
*  by tuition above median within type
eststo: quietly	reghdfe  `m'    treat2_x_POSTs    	 treat2_x_POSTs_x_tuitTH 	POSTs_x_tuitTH				, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq i.region_yq_  )  $error_clusrs
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
* by percentile rank tuition
eststo: quietly	reghdfe  `m'    treat2_x_POSTs    	 treat2_x_POSTs_x_$T  	POSTs_x_$T				, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq i.region_yq_  )  $error_clusrs
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
* by percentile rank tuition & college interaction
eststo: quietly	reghdfe  `m'    treat2_x_POSTs   treat2_x_POSTs_x_comm 	 treat2_x_POSTs_x_$T  	POSTs_x_$T				, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq i.region_yq_  )  $error_clusrs
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
* by percentile rank tuition within colleges
eststo: quietly	reghdfe  `m'    treat2_x_POSTs    	 treat2_x_POSTs_x_$T  	POSTs_x_$T		if insttypelist=="College"			, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq i.region_yq_  )  $error_clusrs
estadd local Samp  	 "Col"
estadd ysumm
estadd scalar r = e(r2_a)
* by percentile rank tuition within uni
eststo: quietly	reghdfe  `m'    treat2_x_POSTs    	 treat2_x_POSTs_x_$T  	POSTs_x_$T		if insttypelist=="University"			, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq i.region_yq_  )  $error_clusrs
estadd local Samp  	 "Univ"
estadd ysumm
estadd scalar r = e(r2_a)
}
		esttab using "$outty/Table5.csv", replace cells(b(star fmt(3)) t(par fmt(2)))   ///
		stats(N r ymean  Samp, label(N "AdjR2" "Y-mean" "Sample") fmt(0 2 2 0))    ///
		order( treat2_x_POSTs   treat2_x_POSTs_x_comm  treat2_x_POSTs_x_tuitTH treat2_x_POSTs_x_$T  POSTs_x_tuitTH	POSTs_x_$T		 ) drop( _cons* )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
		
		

* Table A2: Effect of above-median enrollment exposure on undergraduate enrollment in public universities, fall semesters
clear
use "$outty\s_texasdata" , clear
eststo clear 
global error_clusrs vce(cluster region_yq_ )
/* since enrollment by undergraduate status is available only in fall semesters and only for some public universities, we must make the 
 following restriction so that all the base sample of school is the same for all regression columns (otherwise they can't be compared) */
keep if semesterdesc=="Fall"
keep if insttypelist=="University"
keep if !missing(a_enrollment_ugenter)
keep if !missing(s_enrollment)
foreach m of varlist    log_s_enrollment s_enrollment  log_a_enrollment_ugenter a_enrollment_ugenter  log_a_enrollment_ugprevious a_enrollment_ugprevious  log_a_enrollment_ugtranfer a_enrollment_ugtranfer   { 
* all
eststo: quietly	reghdfe  `m'     treat2_x_POSTs    				, ///
							absorb( i.instlist_  i.yq     i.insttypelist_#i.yq  i.region_yq_  )  $error_clusrs 
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)

}
		esttab using "$outty/TableA2.csv", replace cells(b(star fmt(3)) t(par fmt(2)))   ///
		stats(N r ymean  Samp, label(N "AdjR2" "Y-mean" "Sample") fmt(0 2 2 0))    ///
		order( treat2_x_POSTs   ) drop( _cons* )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
		
		
/* +++ */		
/*-------------------------------------------------------------------------------------------------------------------------
88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
APX D Enrollment: School-Major-time regressions 
88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
--------------------------------------------------------------------------------------------------------------------------- */	

* make a school-major-semester level dataset
use "$outty\school_major_year RFS", clear  
merge m:1 dimyear instlist semesterdesc using "$outty\s_texasdata"
keep if _merge==3
drop _merge

egen long cipdesc_ = group(cipdesc)
egen cipdesc_yq = concat(cipdesc_ yq2)
egen long cipdesc_yq_ = group(cipdesc_yq)
egen instlist_cipdesc = concat(instlist_ cipdesc_)
egen long instlist_cipdesc_ = group(instlist_cipdesc)

global error_clusrs vce(cluster region_yq_ ) 

/* The below automatically produces Table D1 Panel A. 
	note: to reproduce Table D1 Panel B, you must first rerun the data above using the ranked disaster county treatment intensity measure: pcranktype_dis_enroll_2017q1 */
g treat2_x_POSTs_x_undec = $enrollmentsharemeasure * POSTs * undeclared
g treat2_x_POSTs_x_loEarnY1 = $enrollmentsharemeasure * POSTs * lo_y1_p50 // change measure lo_y10_p50 lo_y1_p50 lo_y10_p50
g treat2_x_POSTs_x_hiEarnY1 = $enrollmentsharemeasure * POSTs * hi_y1_p50 // change measure lo_y10_p50 lo_y1_p50 lo_y10_p50
g treat2_x_POSTs_x_loEarnY5 = $enrollmentsharemeasure * POSTs * lo_y5_p50 // change measure lo_y10_p50 lo_y1_p50 lo_y10_p50
g treat2_x_POSTs_x_hiEarnY5 = $enrollmentsharemeasure * POSTs * hi_y5_p50 // change measure hi_y10_p50 hi_y1_p50 hi_y10_p50
g treat2_x_POSTs_x_loEarnY10 = $enrollmentsharemeasure * POSTs * lo_y10_p50 // change measure lo_y10_p50 lo_y1_p50 lo_y10_p50
g treat2_x_POSTs_x_hiEarnY10 = $enrollmentsharemeasure * POSTs * hi_y10_p50 // change measure lo_y10_p50 lo_y1_p50 lo_y10_p50
drop if missing(lo_y10_p50) // make sure sample is consistent with graphs in main text

*Table D1: Difference-in-difference estimates of higher education enrollment, school-major level
eststo clear 
foreach m of varlist   enrollment_by_cip    { 
*(1)
eststo: quietly	reghdfe  `m'      treat2_x_POSTs	 				, ///
							absorb(  i.instlist_cipdesc_     i.insttypelist_#i.yq  i.region_yq_  i.cipdesc_yq_ )  $error_clusrs /*vce(robust)*/
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
*(2)
eststo: quietly	reghdfe  `m'    treat2_x_POSTs  treat2_x_POSTs_x_loEarnY1 treat2_x_POSTs_x_loEarnY5 treat2_x_POSTs_x_loEarnY10   treat2_x_POSTs_x_undec	 	  			 							, ///
							absorb(  i.instlist_cipdesc_     i.insttypelist_#i.yq   i.region_yq_  i.cipdesc_yq_ )  $error_clusrs 
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
*Earnings undec (3)
eststo: quietly	reghdfe  `m'    treat2_x_POSTs  treat2_x_POSTs_x_loEarnY10   treat2_x_POSTs_x_undec		  			 							, ///
							absorb(  i.instlist_cipdesc_    i.insttypelist_#i.yq  i.region_yq_  i.cipdesc_yq_ )  $error_clusrs 
estadd local Samp  	 "All"
estadd ysumm
estadd scalar r = e(r2_a)
*(4)
eststo: quietly	reghdfe  `m'     treat2_x_POSTs treat2_x_POSTs_x_loEarnY10   treat2_x_POSTs_x_undec if insttypelist!="Public Universities"	  		  			 							, ///
							absorb(  i.instlist_cipdesc_       					i.region_yq_  i.cipdesc_yq_ )  $error_clusrs 
estadd local Samp  	 "Coll"
estadd ysumm
estadd scalar r = e(r2_a)
*(5)
eststo: quietly	reghdfe  `m'     treat2_x_POSTs treat2_x_POSTs_x_loEarnY10   treat2_x_POSTs_x_undec if insttypelist=="Public Universities"	  		  			 							, ///
							absorb(  i.instlist_cipdesc_    						 i.region_yq_  i.cipdesc_yq_ )  $error_clusrs 
estadd local Samp  	 "Univ"
estadd ysumm
estadd scalar r = e(r2_a)
}
		esttab using "$outty/TableD1.csv", replace cells(b(star fmt(3)) t(par fmt(2)))   ///
		stats(N r ymean  Samp, label(N "AdjR2" "Y-mean" "Sample") fmt(0 2 2 0))    ///
		drop(_cons) order( treat2_x_POSTs treat2_x_POSTs_x_loEarnY1  treat2_x_POSTs_x_loEarnY5   treat2_x_POSTs_x_loEarnY10   treat2_x_POSTs_x_undec	 )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 		
		




		
		
/*-------------------------------------------------------------------------------------------------------------------------
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ENROLLMENT BY MAJOR (CIP GROUP) SUMMARY GRAPHS
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
---------------------------------------------------------------------------------------------------------------------------*/
* Figure 6: Second difference in average enrollment shares at Texas public universities and colleges, by major group
*reduce the orginal semster level data to key variables
use "$outty\s_texasdata" , clear
keep dimyear insttypelist instlist semesterdesc yq s_enrollment pcranktype_dis_enroll_2017q1 typetop50_dis_enrollshare_2017q1     
sort  instlist yq
save "$outty\tempy1", replace  
*pull in enrollment by major
use "$outty\school_major_year RFS", clear  
	collapse (sum) enrollment_by_cipgroup=enrollment_by_cip (mean) y1_p50	y5_p50 y10_p50 , by( instlist yq cipgroupdesc_short) 
	sort  instlist yq
*merge major data with original regression dataset
merge m:1  instlist yq using "$outty\tempy1"
keep if _merge==3
drop _merge
order  yq instlist insttypelist   cipgroupdesc_short s_enrollment pcranktype_dis_enroll_2017q1 typetop50_dis_enrollshare_2017q1 
sort  yq instlist 
* calculate major enrollment share for each school-time
g p_enrollment_by_cipgroup=enrollment_by_cipgroup/s_enrollment
* define post as in regressions (fall 2017 and after)
g POSTs =  (yq>yq(2017,2)) 
save "$outty\prep_major_data", replace  
* identify majors that had zero enrollment in a given year, since these are majors that might have been not offered by the school
use "$outty\prep_major_data" , clear																							
* collapse to the year-school-major average
sort instlist cipgroupdesc_short POSTs 
collapse (mean) enrollment_by_cipgroup     ///
				, by(instlist cipgroupdesc_short dimyear ) 
* transpose to drop majors that were eliminated and then take the post-pre change in enrollment within a school-major
reshape wide enrollment_by_cipgroup , i(instlist cipgroupdesc_short  ) j(dimyear )
sort instlist cipgroupdesc_short 
* replace missings with zeros since that means that the major existed at one point for that school but that nobody is enrolled in a given year
foreach x of varlist   enrollment_by_cipgroup2014  enrollment_by_cipgroup2015  ///
						enrollment_by_cipgroup2016  enrollment_by_cipgroup2017  ///
						enrollment_by_cipgroup2018  enrollment_by_cipgroup2019   enrollment_by_cipgroup2020 enrollment_by_cipgroup2021 {  
  replace `x' = 0 if missing(`x')
}
* generate list of school-majors to drop because the major had a year of zero enrollment, hence the school may not have offered the major that year.
g dropmajor = 0
replace dropmajor=1 if enrollment_by_cipgroup2014==0 | enrollment_by_cipgroup2015==0 | enrollment_by_cipgroup2016==0 | enrollment_by_cipgroup2017==0 | ///
		enrollment_by_cipgroup2018==0 | enrollment_by_cipgroup2019==0  | enrollment_by_cipgroup2020==0 | enrollment_by_cipgroup2021==0  
keep instlist cipgroupdesc_short dropmajor		
save "$outty\tempy2", replace  
* merge the list of dropped majors in with the original data		
use "$outty\prep_major_data" , clear
sort instlist cipgroupdesc_short 
*merge major data with original regression dataset
merge m:1 instlist cipgroupdesc_short using "$outty\tempy2"
keep if _merge==3
drop _merge 
drop if dropmajor==1 // drop majors that have zero enrollment in a given year
* average enrollment to the post vs. pre period-school-major enrollment average
sort instlist cipgroupdesc_short POSTs 
 drop if dimyear>2020 | dimyear<2014 
collapse (mean) enrollment_by_cipgroup p_enrollment_by_cipgroup  typetop50_dis_enrollshare_2017q1  ///
				, by(instlist cipgroupdesc_short POSTs  ) 
* define high and low exposure
g exposure_group = 2
replace exposure_group = 3 if typetop50_dis_enrollshare_2017q1==1  // high exposure
replace exposure_group = 1 if typetop50_dis_enrollshare_2017q1==0  // low exposure
* transpose to take the post-pre change in enrollment within a school-major
reshape wide enrollment_by_cipgroup p_enrollment_by_cipgroup, i(instlist cipgroupdesc_short  exposure_group) j(POSTs )
sort instlist cipgroupdesc_short 
* generate the post - pre period average enrollment by school-major
g d_enrollment_by_cipgroup=enrollment_by_cipgroup1-enrollment_by_cipgroup0
g dp_enrollment_by_cipgroup=p_enrollment_by_cipgroup1-p_enrollment_by_cipgroup0
* ADDITIONAL RULES
drop if enrollment_by_cipgroup0<5 // drop school level programs with  less than 5 students on average during the pre-period, (See Fig 6 note) 
drop if p_enrollment_by_cipgroup0<.01 //  drop school level programs that represented less than 1% of a school’s total enrollment (See Fig 6 note) 
save "$outty\tempy3", replace  
* merge back in major earnings, taking the median CIP earnings within the CIP group (this only affects the ordering or the majors in the graphs)
use "$outty\prep_major_data" , clear
drop if missing(y10_p50)
collapse (median) y1_p50 y5_p50 y10_p50 ///
				, by( cipgroupdesc_short) 
sort y10_p50
merge 1:m  cipgroupdesc_short using "$outty\tempy3"
keep if _merge==3
drop _merge 
* since we use 10-year earnins for these graphs, must drop any majors with missing 10 year earnings data
drop if y10_p50 ==0 & cipgroupdesc_short!="Undeclared" 
drop if y10_p50 ==. & cipgroupdesc_short!="Undeclared" 
save "$outty\enrollment_by_major_for_excel_graph", replace  


* //////////////////////////////// MAJOR SHIFTING GRAPHS ///////////////////////////////////////////
* PRE-POST High-Low EXPOSURE CHANGES IN ENROLLMENT %S  {&Delta} 
use "$outty\enrollment_by_major_for_excel_graph" , clear
* average to the major-exposure group level	 
collapse (sum) d_enrollment_by_cipgroup (mean) dp_enrollment_by_cipgroup y1_p50 y5_p50 y10_p50 , by(cipgroupdesc_short exposure_group) 
reshape wide d_enrollment_by_cipgroup dp_enrollment_by_cipgroup, i(cipgroupdesc_short) j(exposure_group )
drop if missing(dp_enrollment_by_cipgroup1) | missing(dp_enrollment_by_cipgroup3)
g rd_enrollment_by_cipgroup = d_enrollment_by_cipgroup3-d_enrollment_by_cipgroup1
g rdp_enrollment_by_cipgroup= dp_enrollment_by_cipgroup3-dp_enrollment_by_cipgroup1
* Figure 6: Second difference in average enrollment shares at Texas public universities and colleges, by major group
* PRE-POST High-Low EXPOSURE CHANGES IN ENROLLMENT %S																					
graph hbar rdp_enrollment_by_cipgroup,  over(cipgroupdesc_short, sort(y10_p50) label(labsize(vsmall))) ///
			 ytitle("{&Delta} in Avg. {it:Enrollment Share} for Post-Pre & High-Low {it:Treatment}") ///
			graphregion(margin(tiny) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white)) bar(1, color(gray) lcolor(black)) 

	
			
			
			
/*-------------------------------------------------------------------------------------------------------------------------
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
APX ENROLLMENT BY MAJOR (CIP) SUMMARY GRAPHS
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
---------------------------------------------------------------------------------------------------------------------------*/
* Figure A14: Change in enrollment in Texas public universities and colleges, by major (CIP)
*pull in enrollment by major
use "$outty\school_major_year RFS", clear  
	drop if semesterdesc=="Summer" | semesterdesc=="Summer I" | semesterdesc=="Summer II" 
	drop if yq>=yq(2020,1) // Test that major shit not driven by COVID, instead the shift is found before covid.
	collapse (sum) enrollment_by_cip=enrollment_by_cip (mean) y1_p50 y5_p50 y10_p50 , by(yq instlist cipdesc) 
	sort   instlist yq
*merge major data with original regression dataset
merge m:1  instlist yq using "$outty\tempy1"
keep if _merge==3
drop _merge
order  dimyear semesterdesc instlist insttypelist   cipdesc s_enrollment pcranktype_dis_enroll_2017q1 typetop50_dis_enrollshare_2017q1 
sort  instlist dimyear
* calculate major enrollment share for each school-time
g p_enrollment_by_cip=enrollment_by_cip/s_enrollment
* define post as in regressions (fall 2017 and after)
g POSTs =  (yq>yq(2017,2)) 
save "$outty\prep_major_data", replace  
* identify majors that had zero enrollment in a given year, since these are majors that might have been not offered by the school
use "$outty\prep_major_data" , clear
* collapse to the year-school-major average
sort instlist cipdesc POSTs 
collapse (mean) enrollment_by_cip      ///
				, by(instlist cipdesc dimyear ) 
* transpose to drop majors that were eliminated and then take the post-pre change in enrollment within a school-major
reshape wide enrollment_by_cip , i(instlist cipdesc  ) j(dimyear )
sort instlist cipdesc 
* replace missings with zeros since that means that the major existed at one point for that school but that nobody is enrolled in a given year
foreach x of varlist   enrollment_by_cip2014  enrollment_by_cip2015  ///
						enrollment_by_cip2016  enrollment_by_cip2017  ///
						enrollment_by_cip2018  enrollment_by_cip2019 /* enrollment_by_cip2020 enrollment_by_cip2021 */ {  // <<  must comment out later years FOR REPRODUCING Figure A14 BECAUSE MUST DROP AFTER COVID SEMESTER TO SHOW FINDINGS ROBUST 		  !!!
  replace `x' = 0 if missing(`x')
}
* generate list of school-majors to drop because the major had a year of zero enrollment, hence the school may not have offered the major that year.
g dropmajor = 0
replace dropmajor=1 if enrollment_by_cip2014==0 | enrollment_by_cip2015==0 | enrollment_by_cip2016==0 | enrollment_by_cip2017==0 | ///
		enrollment_by_cip2018==0 | enrollment_by_cip2019==0  /* | enrollment_by_cip2020==0  | enrollment_by_cip2021==0 */ // <<  must comment out later years FOR REPRODUCING Figure A14 BECAUSE MUST DROP AFTER COVID SEMESTER TO SHOW FINDINGS ROBUST 		  !!!
keep instlist cipdesc dropmajor		
save "$outty\tempy2", replace  
* merge the list of dropped majors in with the original data		
use "$outty\prep_major_data" , clear
sort instlist cipdesc 
*merge major data with original regression dataset
merge m:1 instlist cipdesc using "$outty\tempy2"
keep if _merge==3
drop _merge 
drop if dropmajor==1 // drop majors that have zero enrollment in a given year 
* average enrollment to the post vs. pre period-school-major enrollment average
sort instlist cipdesc POSTs 
collapse (mean) enrollment_by_cip p_enrollment_by_cip typetop50_dis_enrollshare_2017q1  ///
				, by(instlist cipdesc POSTs )   
* define high and low exposure
g exposure_group = 2
replace exposure_group = 3 if typetop50_dis_enrollshare_2017q1==1 
replace exposure_group = 1 if typetop50_dis_enrollshare_2017q1==0  
* transpose to take the post-pre change in enrollment within a school-major
reshape wide enrollment_by_cip p_enrollment_by_cip, i(instlist cipdesc exposure_group ) j(POSTs )
sort instlist cipdesc exposure_group
* generate the post - pre period average enrollment by school-major
g d_enrollment_by_cip=enrollment_by_cip1-enrollment_by_cip0
g dp_enrollment_by_cip=p_enrollment_by_cip1-p_enrollment_by_cip0
* ADDITIONAL RULES
drop if enrollment_by_cip0<5 // FIG A14 CAPTION: we drop all schoolmajor combinations in which enrollment was zero during any year in our sample, had less than 5 students on average during the pre-period,
drop if p_enrollment_by_cip0<.01 // FIG A14 CAPTION: or represented less than 1% of a school’s total enrollment, on average, over the pre-period.
save "$outty\tempy3", replace  
* merge back in major earnings, taking the median CIP earnings within the CIP group (this only affects the ordering or the majors in the graphs)
use "$outty\prep_major_data" , clear
drop if missing(y10_p50)
collapse (median) y1_p50 y5_p50 y10_p50 ///
				, by( cipdesc) 
sort y10_p50
merge 1:m  cipdesc using "$outty\tempy3"
keep if _merge==3
drop _merge 
save "$outty\enrollment_by_major_for_excel_graph", replace  


//////////////////////////////// APPENDIX MAJOR SHIFTING GRAPHS ///////////////////////////////////////////
* PRE-POST High-Low EXPOSURE CHANGES IN ENROLLMENT %S  {&Delta} 
use "$outty\enrollment_by_major_for_excel_graph" , clear
* aggregated (#s) and average (%s) to the major-exposure group level	 
collapse (sum) d_enrollment_by_cip (mean) dp_enrollment_by_cip y1_p50 y5_p50 y10_p50, by(cipdesc exposure_group) 
reshape wide d_enrollment_by_cip dp_enrollment_by_cip, i(cipdesc) j(exposure_group )
drop if missing(dp_enrollment_by_cip1) | missing(dp_enrollment_by_cip3)
g rd_enrollment_by_cip = d_enrollment_by_cip3-d_enrollment_by_cip1
g rdp_enrollment_by_cip= dp_enrollment_by_cip3-dp_enrollment_by_cip1
sort y10_p50
drop if abs(rdp_enrollment_by_cip)<0.006  // FIG A14 CAPTION: To fit the hundreds of majors onto one graph, we further restrict the sample to majors with an average enrollment share second difference of at least 0.6 of a percentage point.
* strip numbers from string
forval n = 0/9{
    replace cipdesc = subinstr(cipdesc, "`n'", "",.)
}
replace cipdesc = subinstr(cipdesc, " - ", "",.)
drop if y10_p50 ==0 & cipdesc!="Undeclared" 
drop if y10_p50 ==. & cipdesc!="Undeclared" 
replace cipdesc = "Humanities"   if cipdesc=="Liberal Arts and Sciences, General Studies and Humanities" // must shorten to fit
*Figure A14: Change in enrollment in Texas public universities and colleges, by major (CIP) 
* PANEL A
* PRE-POST High-Low EXPOSURE CHANGES IN ENROLLMENT %S 	
graph hbar rdp_enrollment_by_cip,  over(cipdesc, sort(y10_p50) label(labsize(vsmall))) ///
			 ytitle("{&Delta} in Avg. {it:Enrollment Share} for Post-Pre & High-Low {it:Treatment}")  yscale(range(-0.10 0.05)) ///
			graphregion(margin(tiny) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white)) bar(1, color(gray) lcolor(black)) 
*Figure A14: Change in enrollment in Texas public universities and colleges, by major (CIP) 
* PANEL B
* PRE-POST High EXPOSURE CHANGES IN ENROLLMENT #s 																								
replace d_enrollment_by_cip3 = d_enrollment_by_cip3/1000	
graph hbar d_enrollment_by_cip3,  over(cipdesc, sort(y10_p50) label(labsize(vsmall))) ///
			 ytitle("{&Delta} Post-Pre in Aggregate {it:Enrollment} for High {it:Treatment} Schools (Thousands of People)")  yscale(range(-0.10 0.05)) ///
			graphregion(margin(tiny) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white)) bar(1, color(white) lcolor(black)) 
 
 
 
 
 /* +++  */
 /*-------------------------------------------------------------------------------------------------------------------------
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
NONPARAMETRIC ENROLLMENT GRAPHS 
Figures 4, 5, and A12 
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
---------------------------------------------------------------------------------------------------------------------------*/

*GRAD RATE MEAN BY TREATMENT INTENSITY (annual data) ---------------------------------------------
* used in Figure A12 Panel B
clear all
use "$outty\s_texasdata" , clear
sort    instlist dimyear yq
quietly by   instlist dimyear: gen dup = cond(_N==1,0,_n)
drop if dup>1
*DETREND
reg a_yr54_grad_rate i.yq#i.insttypelist_   
predict re, residuals
rename  re r_a_yr54_grad_rate
local z 33  // 33rd percentile break is used in Figure A12 Panel B
local h dis //  dis   , just stands for disaster 
keep if typetop`z'_`h'_enrollshare_2017q1 ==1   | typebot`z'_`h'_enrollshare_2017q1==1
* Then take the mean  by treatment group and date
collapse (mean)    r_a_yr54_grad_rate    a_yr54_grad_rate       , by(dimyear typetop`z'_`h'_enrollshare_2017q1 )  
sort dimyear  
foreach outcome of varlist    r_a_yr54_grad_rate       a_yr54_grad_rate        {
sort dimyear
#d 
graph twoway (line `outcome' dimyear if typetop`z'_`h'_enrollshare_2017q1 == 0 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   yline(0, lcolor(black))  xline(2017, lcolor(black) lpattern(dash)) ) 	// yq=229 corresponds with 2017q2, these put a solid line at y=0 and at x=2016q2
			 (line `outcome' dimyear if typetop`z'_`h'_enrollshare_2017q1 == 1 , lpattern(shortdash dot) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "Bottom `z'%" 2 "Top `z'% of Treat") col(2) pos(12) ring(1)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("`outcome'",size(medium)) xtitle("")
;
#d cr
graph save "$outty/nonpar_`outcome'`z'_`h'.gph", replace
} // close dependent


* GRAD RATE MEAN NOT BY TREATEMENT -------------------------------------------------------------
* used in Figure A12 Panel B
clear all
use "$outty\s_texasdata" , clear
sort    instlist dimyear yq
quietly by   instlist dimyear: gen dup = cond(_N==1,0,_n)
drop if dup>1
drop if missing(typetop50_dis_enrollshare_2017q1)
collapse (mean) a_yr54_grad_rate   , by(dimyear )  
sort dimyear  
foreach outcome of varlist  a_yr54_grad_rate       {
sort dimyear
#d 
graph twoway (line `outcome' dimyear , lpattern(solid) lcolor(black) lwidth(thick) mcolor(black) msymbol(O)   yline(0, lcolor(black)) xline(2017, lcolor(black) lpattern(dash)))  ,
	   scheme(s2mono)  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("`outcome'",size(medium)) xtitle("")
;
#d cr
graph save "$outty/nonpar_`outcome'.gph", replace
} // close dependent



* MEAN AND AGGREGATE ENROLLMENT BY TREATMENT INTENSITY ---------------------------------------------
* Figure 4 and Figure A12 (Panel A)
clear all
use "$outty\s_texasdata" , clear
*DETREND
* regress the school outcome on a date dummy  , then keep the associated residuals 
reg s_enrollment i.yq  
predict re, residuals
rename  re r_s_enrollment
local z 33 //  CHANGE: 20 33  FIGURE A12 Panel A uses both the 33rd percentile and the 20th percentile. Must run both.
local h dis //  dis  , just stands for disaster 
keep if typetop`z'_`h'_enrollshare_2017q1 ==1   | typebot`z'_`h'_enrollshare_2017q1==1
* Then take the mean of the residuals by treatment group and date,
collapse (mean)  r_s_enrollment s_enrollment     (sum) sum_s_enrollment= s_enrollment sum_r_s_enrollment = r_s_enrollment , by(yq typetop`z'_`h'_enrollshare_2017q1 )  
sort yq  // need to sort so that the lines connect in order
foreach outcome of varlist  r_s_enrollment s_enrollment  sum_r_s_enrollment sum_s_enrollment           {
sort yq	
#d 
graph twoway (line `outcome' yq if typetop`z'_`h'_enrollshare_2017q1 == 0 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   yline(0, lcolor(black))  xline(228, lcolor(black) lpattern(dash)) ) 	// yq=229 corresponds with 2017q2, these put a solid line at y=0 and at x=2016q2
			 (line `outcome' yq if typetop`z'_`h'_enrollshare_2017q1 == 1 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "Bottom `z'%" 2 "Top `z'% of Treat") col(2) pos(12) ring(1)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("`outcome'",size(medium)) xtitle("")
;
#d cr
graph save "$outty/nonpar_`outcome'`z'_`h'.gph", replace
} // close dependent


 
* AGGREGATE AND MEAN ENROLLMENT NOT BY TREATEMENT -------------------------------------------------------------
* Figure 4 and Figure A12 (Panel A)
clear all
use "$outty\s_texasdata" , clear
drop if missing(typetop50_dis_enrollshare_2017q1)
* 10 schools closed during the pandemic, which is why the aggregate graph dips down in Fall 2020 
* Then take the mean of the residuals by treatment group and date,
collapse (mean)   s_enrollment     (sum) sum_s_enrollment= s_enrollment  , by(yq )  
sort yq  
foreach outcome of varlist  s_enrollment  sum_s_enrollment  {
sort yq	
#d 
graph twoway (line `outcome' yq , lpattern(solid) lcolor(black) lwidth(thick) mcolor(black) msymbol(O)   yline(0, lcolor(black))  xline(228, lcolor(black) lpattern(dash)) )  ,
	  scheme(s2mono)  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("`outcome'",size(medium)) xtitle("")
;
#d cr
graph save "$outty/nonpar_`outcome'.gph", replace
} // close dependent




* MEAN ENROLLMENT BY TREATMENT INTENSITY AND MAJOR EARNINGS FOR COLLEGES ---------------------------------------------
* Figure 5: Trends in average enrollment in low-earnings majors in Texas colleges
*  pull in enrollment by major
use "$outty\school_major_year RFS", clear  
local e 10 //  10 year earnings median earnings is used to divide into low earnings majors
	drop if semesterdesc=="Summer" | semesterdesc=="Summer I" | semesterdesc=="Summer II" 
	collapse (sum) enrollment_by_cip=enrollment_by_cip (mean) lo_y`e'_p50 , by(dimyear semesterdesc instlist cipdesc) 
	sort  dimyear semesterdesc instlist
*merge major data with original regression dataset
merge m:1 dimyear instlist semesterdesc using "$outty\s_texasdata"
keep if _merge==3
drop _merge
* GRAPH: detrend
reg enrollment_by_cip i.yq#i.insttypelist_   
predict re, residuals
rename  re r_enrollment_by_cip
keep if insttypelist=="College" 
local z 33 //  33rd percentile used to break schools into bottom middle and top treatment
local h dis //  dis  (just stands for disaster)
keep if typetop`z'_`h'_enrollshare_2017q1 ==1   | typebot`z'_`h'_enrollshare_2017q1==1
	* take school-level agregates
	collapse (mean) r_enrollment_by_cip enrollment_by_cip (sum) sum_r_enrollment_by_cip=r_enrollment_by_cip sum_enrollment_by_cip=enrollment_by_cip , by(yq  typetop`z'_`h'_enrollshare_2017q1 lo_y`e'_p50) 
	sort   yq
	sort  yq typetop`z'_dis_enrollshare_2017q1 lo_y`e'_p50
	drop if missing(typetop`z'_dis_enrollshare_2017q1)
foreach depvar of varlist  r_enrollment_by_cip  enrollment_by_cip    {
sort yq
#d 
graph twoway (line `depvar' yq if typetop`z'_dis_enrollshare_2017q1 == 0 & lo_y`e'_p50==1 , lpattern(solid) lcolor(black)   xline(228, lcolor(black) lpattern(dash)) ) 	// yq=228 corresponds with 2017q1  
			 (line `depvar' yq if typetop`z'_dis_enrollshare_2017q1 == 1 & lo_y`e'_p50==1 , lpattern(solid) lwidth(thick) lcolor(blue)  ) 			 ,
	  legend(order(1 "Bottom `z'%"  2 "Top `z'% of Treat" ) col(2) pos(12) ring(1)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("`depvar'",size(medium)) xtitle("")
;
#d cr  
graph save "$outty/nonpar_`depvar'`z'_`h'.gph", replace
} // close dependent



* mean ENROLLMENT in Low EARNINGS FOR COLLEGES NOT BY TREATMENT INTENSITY ---------------------------------------------
* Figure 5: Trends in average enrollment in low-earnings majors in Texas colleges
use "$outty\school_major_year RFS", clear  
local e 10 
	drop if semesterdesc=="Summer" | semesterdesc=="Summer I" | semesterdesc=="Summer II" //  summers semesters are not included in any analysis
	collapse (sum) enrollment_by_cip=enrollment_by_cip (mean) lo_y`e'_p50 , by(dimyear semesterdesc instlist cipdesc) 
	sort  dimyear semesterdesc instlist 
*merge major data with original regression dataset
merge m:1 dimyear instlist semesterdesc using "$outty\s_texasdata"
keep if _merge==3
drop _merge
drop if missing(typetop50_dis_enrollshare_2017q1)
keep if insttypelist=="College" 
	* take school-level agregates
	collapse (mean) enrollment_by_cip (sum) sum_enrollment_by_cip=enrollment_by_cip , by(yq  lo_y`e'_p50) 
	sort   yq
	sort  yq lo_y`e'_p50
foreach depvar of varlist   enrollment_by_cip     {
sort yq
#d 
graph twoway (line `depvar' yq if lo_y`e'_p50==1 , lpattern(solid) lcolor(black) lwidth(thick) mcolor(black) msymbol(O)    xline(228, lcolor(black) lpattern(dash)) )  ,
	  scheme(s2mono)  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("`depvar'",size(medium)) xtitle("")
;
#d cr  
graph save "$outty/nonpar_`depvar'.gph", replace
} // close dependent






* FINAL FIGURES --------------------------------------------------------------------------------------------------
* Figure 4
* Average enrollment in public universities and colleges
#d
graph combine  "$outty/nonpar_s_enrollment.gph"   "$outty/nonpar_r_s_enrollment33_dis.gph" "$outty/nonpar_r_s_enrollment20_dis.gph" 
				, xcommon rows(1) cols(3) fxsize(120) fysize(45) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white))
			 play("$outty/AvgEnroll_NonPar_RFS1.grec")  
;
#d cr


* Figure A12, Panel A
*  Aggregate enrollment in public universities and colleges
#d
graph combine   "$outty/nonpar_sum_s_enrollment.gph" "$outty/nonpar_sum_r_s_enrollment33_dis.gph" "$outty/nonpar_sum_r_s_enrollment20_dis.gph"
				, xcommon rows(1) cols(3) fxsize(120) fysize(45) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white))
			 play("$outty/AgEnroll_NonPar_RFS1.grec") 
;
#d cr

* Figure A12, Panel B
* Average graduation rate (5 years university, 4 years college) at public universities and colleges
#d
graph combine     "$outty/nonpar_a_yr54_grad_rate.gph" "$outty/nonpar_a_yr54_grad_rate33_dis.gph"   "$outty/nonpar_r_a_yr54_grad_rate33_dis.gph"  
				,  rows(1) cols(3) fxsize(120) fysize(45) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white))
			 play("$outty/AvgGradRate_NonPar_RFS1.grec")  
;
#d cr

* Figure 5: Trends in average enrollment in low-earnings majors in Texas colleges
* Average enrollment in low earnings majors at public colleges
#d
graph combine    "$outty/nonpar_enrollment_by_cip.gph"  "$outty/nonpar_enrollment_by_cip33_dis.gph" 	"$outty/nonpar_r_enrollment_by_cip33_dis.gph" 
				,  xcommon rows(1) cols(4) fxsize(120) fysize(45) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white))
			 play("$outty/AvgEnrollbyMajor_NonPar_RFS1.grec") 
;
#d cr




